#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# Created on 2018-10-25 17:26:01
# Project: anjuke_com_comment

import json
import html
import time
import random
import requests
from pyspider.libs.base_handler import *

from src.mysqldb import SQL
from src.headers_switch import HeadersSelector
from src.utils import oss
from src.utils import get_proxy


class Handler(BaseHandler):
    crawl_config = {
        'itag': 'vsn0.0.3',
        'proxy': '',
    }
    cookies = 'isp=true; aQQ_ajkguid=E696EB28-C520-DAC5-7EDD-SX1023163414; sessid=F26B68F4-D6B8-EAF3-380C-SX1023163414; isp=true; twe=2; 58tj_uuid=e55c837d-1a93-44c2-ab27-5e8d71c49bbd; als=0; Hm_lvt_c5899c8768ebee272710c9c5f365a6d8=1540283658; ajk_member_captcha=9863c41470623333db434acdb383b391; lps=http%3A%2F%2Flogin.anjuke.com%2Flogin%2Fiframeform%3Fstyle%3D1%26forms%3D10%26third_parts%3D000%26other_parts%3D000%26forget_pwd%3D0%26hidehead%3D1%26submit_text%3D%25E5%25BF%25AB%25E9%2580%259F%25E7%2594%25B3%25E8%25AF%25B7%26submiting_text%3D%25E6%258F%2590%25E4%25BA%25A4%25E4%25B8%25AD%26submiting_bg%3D%2523ddd%7Chttps%3A%2F%2Fhz.fang.anjuke.com%2Floupan%2F240349.html; ved_loupans=440330; ctid=42; init_refer=; new_uv=8; new_session=0; lp_lt_ut=111c1403f02e0114af25e9f9713c6dc7; Hm_lpvt_c5899c8768ebee272710c9c5f365a6d8=1540472070'

    def get_cookies(self):
        cookie = {}
        return cookie
        for line in self.cookies.split(';'):
            key,value = line.split('=', 1) #1代表只分一次，得到两个数据
            cookie[key.strip()] =value.strip()
        return cookie

    @every(minutes=24 * 60)
    def on_start(self):
        # self.crawl('__START_URL__', callback=self.index_page)
        self.crawl_config['proxy'] = get_proxy()
        header_slt = HeadersSelector()
        header = header_slt.select_header()  # 获取一个新的 header
        header['Referer'] = 'https://zh.fang.anjuke.com/loupan/dianping-440330.html?from=dplist_dp_hotlist'
        header['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
        js_script = """
            function() {
                setTimeout(function() {}, 3000);
            }
        """
        house_id = 440330
        page = 1
        save = {
            'house_id': house_id,
        }
        url = 'https://sh.fang.anjuke.com/loupan/dianping-%d.htmls/?type=news&from=dplist_dp_timelist&p=%d' % (house_id, page)
        url = 'https://zh.fang.anjuke.com/loupan/dianping-440330.html?type=news&from=dplist_dp_timelist'
        self.crawl(url, callback=self.detail_page, save=save, headers=header, cookies=self.get_cookies(), fetch_type='js', js_script=js_script, connect_timeout=5000, timeout=20000)
        return
        items = [
            (258337, [
                'https://dg.fang.anjuke.com/loupan/dianping-258337.html?type=news&from=dplist_dp_timelist',
                'https://dg.fang.anjuke.com/loupan/dianping-258337.htmls/?type=news&from=dplist_dp_timelist&p=2',
            ]),
            (414421, [
                'https://sh.fang.anjuke.com/loupan/dianping-414421.htmls/?type=news&from=dplist_dp_timelist',
                'https://sh.fang.anjuke.com/loupan/dianping-414421.htmls/?type=news&from=dplist_dp_timelist&p=2',
                'https://sh.fang.anjuke.com/loupan/dianping-414421.htmls/?type=news&from=dplist_dp_timelist&p=3',
                'https://sh.fang.anjuke.com/loupan/dianping-414421.htmls/?type=news&from=dplist_dp_timelist&p=4',
                '',
            ]),
            (440330, [
                'https://zh.fang.anjuke.com/loupan/dianping-440330.htmls/?type=news&from=dplist_dp_timelist',
            ]),
            (240349, [
                'https://hz.fang.anjuke.com/loupan/dianping-240349.htmls/?type=news&from=dplist_dp_timelist',
                'https://hz.fang.anjuke.com/loupan/dianping-240349.htmls/?type=news&from=dplist_dp_timelist&p=2',
            ]),
        ]
        for (house_id, url_li) in items:
            for url in url_li:
                save = {
                    'house_id': house_id,
                }
                self.crawl(url, callback=self.detail_page, save=save, headers=header, fetch_type='js', js_script=js_script, connect_timeout=5000, timeout=20000)

    @config(priority=2)
    def detail_page(self, response):
        self.crawl_config['proxy'] = get_proxy()
        header_slt = HeadersSelector()
        header = header_slt.select_header()  # 获取一个新的 header

        header['Referer'] = 'https://dg.fang.anjuke.com/loupan/xiangce-258337/jst.html?from=click_xiangcelist_tab_shijing'
        header['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'

        doc = response.doc
        save = response.save
        house_id = save.get('house_id', '0')

        sql = SQL()
        comment_li = list(doc('.total-revlist li').items())
        print('comment_li len: ',len(comment_li))
        subpath = 'goods_house/avatar'
        for item in comment_li:
            avatar = item.children('div .info-user .portrait img').attr('src')
            if avatar:
                if avatar[0:2]=='//':
                    avatar = 'http:' +avatar
                avatar = oss(subpath, avatar, headers=header)
            else:
                avatar = 'https://jjlmobile.oss-cn-shenzhen.aliyuncs.com/goods_house/avatar/982436b30cc4011c24a56ecb8a2cc4cd.png'
            comment = {
                'house_id' : house_id,
                'author' : item.children('div .info-mod div a .author').text(),
                'author_atatar' : avatar,
                'content' : item.children('div div .part-text').text(),
                'pubdate' : item.children('div div .tray-panel .date').text(),
                'praise' : item.children('div div .tray-panel .share-praise .praise-link em').text(),
                'tram' : item.children('div div .tray-panel .share-praise .tram-link em').text(),
            }
            print(comment)
            #break
            sql.replace('jjb_goods_house_comment', comment)
